# necessary packages #
#using Pkg
#Pkg.add("Distances")
using Distributions
using Random
using Distances
using LinearAlgebra
using SparseArrays
using IterativeSolvers
using ProgressMeter
using JLD2
include("../../util.j")
# unnecessary packages #
#using Pkg
#Pkg.add("UnicodePlots")
using UnicodePlots # check the structure of the sparse matrix
using BenchmarkTools
using StatsPlots
using MCMCChains
using PrettyTables
#using Pkg
#Pkg.add("ProgressMeter");
@load "../data/sim2data.jld";
# preallocation #
#F = Array{Float64,2}(undef, n , 3); # preallocate the matrix F
μ_m1 = Array{Float64, 2}(undef, length(M1_ind), q);
μ_m2 = Array{Float64, 2}(undef, length(M2_ind), q);
nIndx = length(NN.nnIndx);
A1 = Array{Float64}(undef, nIndx); D1 = Array{Float64}(undef, n);
A2 = Array{Float64}(undef, nIndx); D2 = Array{Float64}(undef, n);
I_A1 = SparseMatrixCSC{Float64,Int64};
I_A2 = SparseMatrixCSC{Float64,Int64};
A1_new = Array{Float64}(undef, nIndx); D1_new = Array{Float64}(undef, n);
A2_new = Array{Float64}(undef, nIndx); D2_new = Array{Float64}(undef, n);
I_A1_new = SparseMatrixCSC{Float64,Int64};
I_A2_new = SparseMatrixCSC{Float64,Int64};
Ystar = vcat(Y_ord[S, :], Lβ.L \ μβ, LΛ.L \ μΛ); # will be updated after imputing missing response
Xstar = vcat([X_ord[S, :] fill(0.0, n, K)], [inv(Lβ.L) fill(0.0, p, K)],
[fill(0.0, K, p) inv(LΛ.L)]);
Ψstar = fill(0.0, q, q); νstar = νΣ + n;
μγstar = vcat(μβ, μΛ); Vγstar = fill(0.0, p + K, p + K);
Y_Xm = fill(0.0, n, q); invVγstar = fill(0.0, p + K, p + K);
MCMC sampling algorithm Q1: priors for $\nu_i$ Q2: $\phi_i$ may not be consistant, since the order can change
# Preallocation for MCMC samples and Initalization #
N_sam = 20000;
γ_sam = Array{Float64, 3}(undef, p + K, q, N_sam + 1);
Σ_sam = Array{Float64, 3}(undef, q, q, N_sam + 1);
F_sam = Array{Float64, 3}(undef, n, K, N_sam);
Y_m1_sam = Array{Float64, 2}(undef, length(M1_ind), N_sam);
Y_m2_sam = Array{Float64, 2}(undef, length(M2_ind), N_sam);
A_sam = Array{Float64, 1}(undef, N_sam); # acceptance rate
lh_old = 1; lh_new = 1; # record the likelihood for updating ranges
ϕ_sam = Array{Float64, 2}(undef, K, N_sam + 1);
γ_sam[:, :, 1] = vcat([[0.0 0.0]; [0.0 0.0]], [[1.0 0.0]; [0.0 1.0]]);
Σ_sam[:, :, 1] = [[0.5 0.1]; [0.1 0.5]];
ϕ_sam[:, 1] = [6, 6];
precond_D = Array{Float64, 1}(undef, K * n);
RWM_scale = 0.5; # random-walk metropolis step size scale
# for loop for MCMC chain #
prog = Progress(N_sam, 1, "Computing initial pass...", 50)
for l in 1:N_sam
# Build the matrix D_Sigma_o^{1/2} #
Dic_diag = Dict(2^0 => sparse(1I, 1, 1) * (1 / sqrt(Σ_sam[:, :, l][1, 1])),
2^1 => sparse(1I, 1, 1) * (1 / sqrt(Σ_sam[:, :, l][2, 2])),
(2^0 + 2^1)=> sparse(sqrt(inv(Σ_sam[:, :, l]))));
invD = blockdiag([Dic_diag[i] for i in index_S if i > 0]...);
# Build the matrix for the first iteration #
if l == 1
getAD(coords_ord[:, S], NN.nnIndx, NN.nnDist, NN.nnIndxLU, ϕ_sam[1, l], 0.5, A1, D1);
getAD(coords_ord[:, S], NN.nnIndx, NN.nnDist, NN.nnIndxLU, ϕ_sam[2, l], 0.5, A2, D2);
I_A1 = sparse(nnIndx_row, nnIndx_col, vcat(-A1, ones(n)));
I_A2 = sparse(nnIndx_row, nnIndx_col, vcat(-A2, ones(n)));
end
# Build Ytilde Xtilde
Ytilde = vcat(invD * vcat(Y_ord[S1_ind, 1] - X_ord[S1_ind, :] * γ_sam[1:p, 1, l],
Y_ord[S2_ind, 2] - X_ord[S2_ind, :] * γ_sam[1:p, 2, l])[perm_ind], zeros(K * n));
Xtilde = vcat(invD * kron(sparse(transpose(γ_sam[(p + 1):(p + K), :, l])),
sparse(1:N, 1:N, ones(N)))[obs_ind,
vcat(S, S .+ N)][perm_ind, :],
blockdiag(Diagonal(1 ./ sqrt.(D1)) * I_A1, Diagonal(1 ./ sqrt.(D2)) * I_A2));
# use LSMR to generate sample of F #
nsam = length(Ytilde);
Precond_D = colnorm(Xtilde);
F_sam[:, :, l] = reshape(Diagonal(1 ./ Precond_D) * lsmr(Xtilde * Diagonal(1 ./ Precond_D),
Ytilde + rand(Normal(), nsam)), :, K);
# impute missing response over S#
Xstar[1:n, (p + 1):(p + K)] = F_sam[:, :, l]; # update matrix Xstar with F
mul!(μ_m1, Xstar[M1_Sind, :], γ_sam[:, :, l]);
mul!(μ_m2, Xstar[M2_Sind, :], γ_sam[:, :, l]);
Y_m1_sam[:, l] = μ_m1[:, 1] + (Σ_sam[1, 2, l] / Σ_sam[2, 2, l]) *
(Y_ord[M1_ind, 2] - μ_m1[:, 2]) +
rand(Normal(0, sqrt(Σ_sam[1, 1, l] - Σ_sam[1, 2, l]^2 / Σ_sam[2, 2, l])), length(M1_ind));
Y_m2_sam[:, l] = μ_m2[:, 2] + (Σ_sam[2, 1, l] / Σ_sam[1, 1, l]) *
(Y_ord[M2_ind, 1] - μ_m2[:, 1]) +
rand(Normal(0, sqrt(Σ_sam[2, 2, l] - Σ_sam[2, 1, l]^2 / Σ_sam[1, 1, l])), length(M2_ind)); # improve?...
# use MNIW to sample γ Σ #
Ystar[M1_Sind, 1] = Y_m1_sam[:, l]; # update Ystar with imputed response
Ystar[M2_Sind, 2] = Y_m2_sam[:, l];
invVγstar = cholesky(Xstar'Xstar);
mul!(μγstar, transpose(Xstar), Ystar); μγstar = invVγstar.U \ (invVγstar.L \ μγstar);
Y_Xm = BLAS.gemm('N', 'N', -1.0, Xstar, μγstar) + Ystar;
mul!(Ψstar, transpose(Y_Xm), Y_Xm); BLAS.axpy!(1.0, ΨΣ, Ψstar);
Σ_sam[:, :, l + 1] = rand(InverseWishart(νstar, Ψstar), 1)[1]; # sample Σ
γ_sam[:, :, l + 1] = (invVγstar.U \ reshape(rand(Normal(), (p + K) * q), (p + K), q)) *
cholesky(Σ_sam[:, :, l + 1]).U + μγstar; # sample γ
# use metropolis-hasting to update range
ϕ_sam[:, l + 1] = ϕ_sam[:, l] + RWM_scale * rand(Normal(), K); # propose next sample point
if all(x -> (x < ϕU && x > ϕL), ϕ_sam[:, l + 1])
lh_old = -0.5 * (sum(log.(D1)) + sum(log.(D2)) +
norm((I_A1 * F_sam[:, 1, l]) ./ sqrt.(D1))^2 +
norm((I_A2 * F_sam[:, 2, l]) ./ sqrt.(D2))^2);
getAD(coords_ord[:, S], NN.nnIndx, NN.nnDist, NN.nnIndxLU, ϕ_sam[1, l + 1], 0.5, A1_new, D1_new);
getAD(coords_ord[:, S], NN.nnIndx, NN.nnDist, NN.nnIndxLU, ϕ_sam[2, l + 1], 0.5, A2_new, D2_new);
I_A1_new = sparse(nnIndx_row, nnIndx_col, vcat(-A1_new, ones(n)));
I_A2_new = sparse(nnIndx_row, nnIndx_col, vcat(-A2_new, ones(n)));
lh_new = -0.5 * (sum(log.(D1_new)) + sum(log.(D2_new)) +
norm((I_A1_new * F_sam[:, 1, l]) ./ sqrt.(D1_new))^2 +
norm((I_A2_new * F_sam[:, 2, l]) ./ sqrt.(D2_new))^2);
A_sam[l] = exp(lh_new - lh_old);
if rand(1)[1] < A_sam[l]
I_A1 = copy(I_A1_new); I_A2 = copy(I_A2_new); # update and update the corresponding I_A D
D1 = copy(D1_new); D2 = copy(D2_new);
else
ϕ_sam[:, l + 1] = ϕ_sam[:, l]; # Don't update
end
else
A_sam[l] = 0.0;
ϕ_sam[:, l + 1] = ϕ_sam[:, l]; # Don't update when falling out of the supports
end
next!(prog) # monitor the progress
end
Posterior prediction
# prediction preparison
N_pre_burn = Integer(trunc(0.75 * N_sam));
M_ind = setdiff(1:N, S); NM = length(M_ind)
F_M_sam = Array{Float64, 3}(undef, NM, K, N_sam - N_pre_burn + 1);
Y_M_sam = Array{Float64, 3}(undef, NM, q, N_sam - N_pre_burn + 1);
# construct Atilde Dtilde #
using RCall
@rput coords_ord
@rput S
@rput m
R"""
library("RANN")
nn_mod_ho <- nn2(t(coords_ord[, S]), t(coords_ord[, -S]), k = m)
"""
@rget nn_mod_ho
Atilde = Array{Float64}(undef, NM * m); Dtilde = Array{Float64}(undef, NM);
MnnIndxLU = collect(1:m:(NM * m + 1));
MnnIndx = vec(nn_mod_ho[:nn_idx]');
for i in N_pre_burn:N_sam
for j in 1:K
# update F
getAD(coords_ord[:, S], MnnIndx, vec(nn_mod_ho[:nn_dists]'), MnnIndxLU,
ϕ_sam[j, i + 1], 0.5, Atilde, Dtilde)
AtildeM = sparse(repeat(1:NM, inner = m), MnnIndx, Atilde, NM, n);
F_M_sam[:, j, (i - N_pre_burn + 1)] = AtildeM * F_sam[:, j, i] + sqrt.(Dtilde) .* rand(Normal(), NM)
end
# update Y
Y_M_sam[:, :, (i - N_pre_burn + 1)] = X_ord[M_ind, :] * γ_sam[1:p, :, i + 1] +
F_M_sam[:, :, (i - N_pre_burn + 1)] * γ_sam[(p + 1):(p + K), :, i + 1] +
transpose(rand(MvNormal(Σ_sam[:, :, i + 1]), NM))
end
β_pos_sam = Array{Float64, 3}(undef, N_sam + 1, p * q, 1);
β_pos_sam[:, :, 1] = hcat(γ_sam[1, 1, :], γ_sam[1, 2, :], γ_sam[2, 1, :], γ_sam[2, 2, :]);
β_chain = Chains(β_pos_sam);
pβ = plot(β_chain)
Λ_pos_sam = Array{Float64, 3}(undef, N_sam + 1, K * q, 1);
Λ_pos_sam[:, :, 1] = hcat(γ_sam[3, 1, :], γ_sam[3, 2, :], γ_sam[4, 1, :], γ_sam[4, 2, :]);
Λ_chain = Chains(Λ_pos_sam);
pΛ = plot(Λ_chain)
ϕ_pos_sam = Array{Float64, 3}(undef, N_sam + 1, K, 1);
ϕ_pos_sam[:, :, 1] = hcat(ϕ_sam[1, :], ϕ_sam[2, :]);
ϕ_chain = Chains(ϕ_pos_sam);
pϕ = plot(ϕ_chain)
Σ_pos_sam = Array{Float64, 3}(undef, N_sam + 1, q * q, 1);
Σ_pos_sam[:, :, 1] = hcat(Σ_sam[1, 1, :], Σ_sam[1, 2, :], Σ_sam[2, 1, :], Σ_sam[2, 2, :]);
Σ_chain = Chains(Σ_pos_sam);
pΣ = plot(Σ_chain)
ω_incp_obs_pos_sam = Array{Float64, 3}(undef, n, q, N_sam);
lll = fill(1.0, (n, 1));
for i in 1:N_sam
ω_incp_obs_pos_sam[:, :, i] = F_sam[:, :, i] * γ_sam[(p + 1):(p + K), :, i + 1] +
lll * transpose(γ_sam[1, :, i + 1]);
end
truncindex = 1;#Integer(trunc(N_sam / 2));
ω_incp_pos_sam = Array{Float64, 3}(undef, N_sam - truncindex + 1, 3, 1);
ω_incp_pos_sam[:, :, 1] = hcat(ω_incp_obs_pos_sam[1, 1, truncindex:N_sam],
ω_incp_obs_pos_sam[1, 2, truncindex:N_sam], ω_incp_obs_pos_sam[200, 1, truncindex:N_sam]);
ω_incp_chain = Chains(ω_incp_pos_sam);
pΣ = plot(ω_incp_chain)
# check the variance covariance across latent process
cov_pos = Array{Float64, 3}(undef, q, q, N_sam);
for i in 1:N_sam
cov_pos[:, :, i] = cov(F_sam[:, :, i] * γ_sam[(p + 1):(p + K), :, i + 1])
end
# ω_incp_obs_pos_sam[:, :, i]
cov_pos_sam = Array{Float64, 3}(undef, N_sam, q * q, 1);
cov_pos_sam[:, :, 1] = hcat(cov_pos[1, 1, :], cov_pos[1, 2, :],
cov_pos[2, 1, :], cov_pos[2, 2, :]);
cov_pos_chain = Chains(cov_pos_sam);
pcov = plot(cov_pos_chain)
covω = cov(ω_ord[S, :])
N_Inf_burn = Integer(trunc(0.75 * N_sam));
ω_incp_obs_pos_qt = Array{Float64, 3}(undef, n, q, 3);
for j in 1:q
for i in 1:n
ω_incp_obs_pos_qt[i, j, :] = quantile(ω_incp_obs_pos_sam[i, j, N_Inf_burn:N_sam], [0.025, 0.5, 0.975])
end
end
# count the covarage of 95% CI #
count_ω_incp = fill(0.0, 2);
for j in 1:q
for i in 1:n
count_ω_incp[j] = count_ω_incp[j] +
((ω_incp_obs_pos_qt[i, j, 1] < ω_incp_obs[S[i], j]) &&
(ω_incp_obs_pos_qt[i, j, 3] > ω_incp_obs[S[i], j]))
end
end
count_ω_incp
count_ω_incp ./ n
summary_table = Array{Float64, 2}(undef, (p - 1) * q + (q * q - 1) * 2 + q, 5);
summary_table[1, :] = vcat(β[2, 1], mean(γ_sam[2, 1, N_Inf_burn:(N_sam + 1)]),
quantile(γ_sam[2, 1, N_Inf_burn:(N_sam + 1)], [0.5, 0.025, 0.975]));
summary_table[2, :] = vcat(β[2, 2], mean(γ_sam[2, 2, N_Inf_burn:(N_sam + 1)]),
quantile(γ_sam[2, 2, N_Inf_burn:(N_sam + 1)], [0.5, 0.025, 0.975]));
summary_table[3, :] = vcat(Σ[1, 1], mean(Σ_sam[1, 1, N_Inf_burn:(N_sam + 1)]),
quantile(Σ_sam[1, 1, N_Inf_burn:(N_sam + 1)], [0.5, 0.025, 0.975]));
summary_table[4, :] = vcat(Σ[1, 2], mean(Σ_sam[1, 2, N_Inf_burn:(N_sam + 1)]),
quantile(Σ_sam[1, 2, N_Inf_burn:(N_sam + 1)], [0.5, 0.025, 0.975]));
summary_table[5, :] = vcat(Σ[2, 2], mean(Σ_sam[2, 2, N_Inf_burn:(N_sam + 1)]),
quantile(Σ_sam[2, 2, N_Inf_burn:(N_sam + 1)], [0.5, 0.025, 0.975]));
summary_table[6, :] = vcat(covω[1, 1], mean(cov_pos_sam[N_Inf_burn:N_sam, 1, 1]),
quantile(cov_pos_sam[N_Inf_burn:N_sam, 1, 1], [0.5, 0.025, 0.975]));
summary_table[7, :] = vcat(covω[1, 2], mean(cov_pos_sam[N_Inf_burn:N_sam, 2, 1]),
quantile(cov_pos_sam[N_Inf_burn:N_sam, 2, 1], [0.5, 0.025, 0.975]));
summary_table[8, :] = vcat(covω[2, 2], mean(cov_pos_sam[N_Inf_burn:N_sam, 4, 1]),
quantile(cov_pos_sam[N_Inf_burn:N_sam, 4, 1], [0.5, 0.025, 0.975]));
summary_table[9, :] = vcat(ϕ1, mean(ϕ_sam[1, N_Inf_burn:N_sam]),
quantile(ϕ_sam[1, N_Inf_burn:N_sam], [0.5, 0.025, 0.975]));
summary_table[10, :] = vcat(ϕ2, mean(ϕ_sam[2, N_Inf_burn:N_sam]),
quantile(ϕ_sam[2, N_Inf_burn:N_sam], [0.5, 0.025, 0.975]));
summary_table = round.(summary_table; digits = 3);
rnames = ["β[2, 1]", "β[2, 2]", "Σ[1, 1]", "Σ[1, 2]", "Σ[2, 2]",
"cov(ω)[1, 1]", "cov(ω)[1, 2]", "cov(ω)[2, 2]", "ϕ1", "ϕ2"];
summary_table = [rnames summary_table];
pretty_table(summary_table, ["" "true" "mean" "median" "2.5%" "97.5%"], markdown)
# posterior prediction check #
N_Inf_burn = 1;
Y_M_pos_qt = Array{Float64, 3}(undef, NM, q, 3);
Y_M_pos_mean = Array{Float64, 2}(undef, NM, q);
for j in 1:q
for i in 1:NM
Y_M_pos_qt[i, j, :] = quantile(Y_M_sam[i, j, :], [0.025, 0.5, 0.975]);
Y_M_pos_mean[i, j] = mean(Y_M_sam[i, j, :])
end
end
# count the covarage of 95% CI #
count_Y_M = fill(0.0, 2);
for j in 1:q
for i in 1:NM
count_Y_M[j] = count_Y_M[j] +
((Y_M_pos_qt[i, j, 1] < Y_ord[M_ind[i], j]) &&
(Y_M_pos_qt[i, j, 3] > Y_ord[M_ind[i], j]))
end
end
count_Y_M
count_Y_M ./ NM
N_Inf_burn = Integer(trunc(0.75 * N_sam));
Y_m1_pos_qt = Array{Float64, 2}(undef, length(M1_ind), 3);
Y_m1_pos_mean = Array{Float64}(undef, length(M1_ind));
Y_m2_pos_qt = Array{Float64, 2}(undef, length(M2_ind), 3);
Y_m2_pos_mean = Array{Float64}(undef, length(M2_ind));
for i in 1:length(M1_ind)
Y_m1_pos_qt[i, :] = quantile(Y_m1_sam[i, N_Inf_burn:N_sam], [0.025, 0.5, 0.975]);
Y_m1_pos_mean[i] = mean(Y_m1_sam[i, N_Inf_burn:N_sam])
end
for i in 1:length(M2_ind)
Y_m2_pos_qt[i, :] = quantile(Y_m2_sam[i, N_Inf_burn:N_sam], [0.025, 0.5, 0.975]);
Y_m2_pos_mean[i] = mean(Y_m2_sam[i, N_Inf_burn:N_sam])
end
for i in 1:length(M1_ind)
count_Y_M[1] = count_Y_M[1] +
((Y_m1_pos_qt[i, 1] < Y_ord[M1_ind[i], 1]) &&
(Y_m1_pos_qt[i, 3] > Y_ord[M1_ind[i], 1]))
end
for i in 1:length(M2_ind)
count_Y_M[2] = count_Y_M[2] +
((Y_m2_pos_qt[i, 1] < Y_ord[M2_ind[i], 2]) &&
(Y_m2_pos_qt[i, 3] > Y_ord[M2_ind[i], 2]))
end
count_Y_M
count_Y_M ./ 200
# calculate root mean square predictive error #
MSPE = (sum((Y_ord[M_ind, :] - Y_M_pos_mean).^2) + sum((Y_m1_pos_mean - Y_ord[M1_ind, 1]).^2 )
+ sum((Y_m2_pos_mean - Y_ord[M2_ind, 2]).^2)) / (2 * 200)
RMSPE = sqrt(MSPE); RMSPE
@save "../results/ω_incp_obs_pos_qt_BSLMC.jld" ω_incp_obs_pos_qt